from lxml import etree

html_str = """
<!doctype html>
<html>
  <body>
    <h1 class="title">Hello LXML</h1>
    <ul id="books">
      <li>Python 基础</li>
      <li>爬虫进阶</li>
    </ul>
  </body>
</html>
"""
tree = etree.HTML(html_str)
lis = tree.findall('.//li')
# for li in lis:
#     print(li.text)

xml_str = '''
<html>
  <body>
    <h1 class="title">Hello LXML</h1>
    <ul id="books">
      <li>Python 基础</li>
      <li>爬虫进阶</li>
    </ul>
  </body>
</html>
'''
tree = etree.XML(xml_str)
lis = tree.findall('.//li')
# for li in lis:
#     print(li.text)

# 默认parser=etree.XMLParser()
tree = etree.fromstring(html_str, parser=etree.HTMLParser())
lis = tree.findall('.//li')
# for li in lis:
#     print(li.text)


# 默认parser=etree.XMLParser()
# tree = etree.parse('11111.html',parser=etree.HTMLParser())
# title = tree.find('.//title').text
# print(title)
# lis = tree.findall('.//li[@id="li1"]')
# for li in lis:
#     print(li.text)

